# -*- coding: utf-8 -*-
"""
Created on Mon May 23 11:04:00 2016

@author: ppradeep
"""
#%%
## Import functions
import csv
from collections import Counter
import numpy

path = 'W:/Projects/'

#%%
##******* User-defined functions *********

## Funtion to calculate metrics
def metrics (t_p, t_n, f_p, f_n):
    total = float(t_p + t_n + f_p + f_n)
    acc = round(100*float(t_p + t_n)/float(total),2)
    sens = round(100*float(t_p)/float(t_p + f_p),2)
    spec = round(100*float(t_n)/float(t_n + f_n),2)
    ba = round((sens+spec)/2,2)
    p_o = float(t_p + t_n)/total
    p_e = ((t_p + f_n)/total)*((t_p + f_p)/total) + ((f_p + t_n)/total)*((f_n + t_n)/total)
    kappa = round(((p_o - p_e)/(1 - p_e)), 2)
    return total, acc, sens, spec, ba, kappa

 
#%%
## ********** Input: CERAPP data file. Used to :
## *********** 1. Create a dictionary of chemicals with >= 4 literature sources ***********************
## **********  mark each chemical hindered or non-hindered, and save their experimental binding data *******************
## ********** Output: New data file with chemicals with >= 4 lit sources and not-null data on binding *


f0 = open(path+'HinderedPhenols-ReadAcross/CERAPP_Phenols.csv','r')
readCSV0 = csv.reader(f0, delimiter=',')
header = readCSV0.next()

cerapp_casrn = {}
all_p = []; all_hp = []
p_data = {}; hp_data = {}

for line in readCSV0:
    phenol_id = line[0]; all_p.append(phenol_id)
    class_b = line[10] # Binder or not. 0 or 1.
    if int(line[9]) >= 4: #More than 4 lit sources (=> also data on binding)
        n_H = line[7] #Number of hindered phenolic groups in the chemical
        n_NH = line[8] #Number of non-hindered phenolic groups in the chemical
        if n_H == '0' and n_NH != '0':
            p_data[phenol_id] = ['NHP', class_b] # Not HP
        else:
            p_data[phenol_id] = ['HP', class_b] # HP
            all_hp.append(phenol_id)
            hp_data[phenol_id] = ['HP', class_b]
    else:
        pass
f0.close()

#########################################################################   
## ******* Select neighbors for each of the descriptor classes ********** 
## ******* and save them in sorted by distance order ********************
#########################################################################

## ****** 1. MoSS MCSS *****
f1 = open(path+'HinderedPhenols-ReadAcross/DistanceFiles/Cerapp-ReadAcross-MoSS-Ds.csv','r')
readCSV1 = csv.reader(f1, delimiter=',')
header = readCSV1.next()

# Read distance data
dist_mat = []
cerapp_id_m = [] #List of all ids in MOSS distance file
i = 0
for idx, line in enumerate(readCSV1):
    cerapp_id_m.append(line[0])
    end = len(line)
    dist_mat.append([])
    for dist in line[5:end]:
        dist_mat[i].append(dist)
    dist_mat[i].append('') # Need one more extra column to compensate for the blank entry for each chemical by itself.
    i = i + 1

## Generate full distance matrix
size = len(dist_mat)   
full_dist_mat = dist_mat
for i in range(size):
    full_dist_mat[i][i] = '0.0'
    for j in range(i+1, size):
        full_dist_mat[i][j] = dist_mat[j][i]

#Create a dictionary of each phenol and sort its neighbors by distance
neighbors_m = {}
for idx, distances in enumerate(full_dist_mat):
    index_sorted = numpy.argsort(distances)[::-1] #[::-1] tells numpy to iterate of the array backwords, sorting in a descending order
    key = cerapp_id_m[idx]
    for index in index_sorted:
        n = cerapp_id_m[index]
        if key in all_hp and key != n and n in p_data.keys() and float(distances[index]) >= 0.70:
            neighbors_m.setdefault(key,[]).append([n, distances[index]])

f1.close()

## ***** 2. Pubchem *****
f2 = open(path+'HinderedPhenols-ReadAcross/DistanceFiles/Cerapp-ReadAcross-Pubchem-TDs.csv','r')
readCSV2 = csv.reader(f2, delimiter=',')
header = readCSV2.next()[5:]
neighbors_p = {}       

for idx, line in enumerate(readCSV2):
    key = line[0] # CERAPP ID
    distances = line[5:]
    index_sorted = numpy.argsort(distances)[::-1] #[::-1] tells numpy to iterate of the array backwords, sorting in a descending order
    for index in index_sorted:
        n = header[index]
        if key in all_hp and key != n and n in p_data.keys():
            neighbors_p.setdefault(key,[]).append([n, distances[index]])
f2.close()  


## ***** 3. Chemotyper *****
f3 = open(path+'HinderedPhenols-ReadAcross/DistanceFiles/Cerapp-ReadAcross-Chemotyper-TDs.csv','r')
readCSV3 = csv.reader(f3, delimiter=',')
header = readCSV3.next()[1:]
neighbors_c = {}       

for idx, line in enumerate(readCSV3):
    key = line[0] # CERAPP ID
    distances = line[1:]
    index_sorted = numpy.argsort(distances)[::-1] #[::-1] tells numpy to iterate of the array backwords, sorting in a descending order
    for index in index_sorted:
        n = header[index]
        if key in all_hp and key != n and n in p_data.keys():
            neighbors_c.setdefault(key,[]).append([n, distances[index]])
f3.close()  

     
#%%
######################################################################     
## ********** Global Filtering: Chemical level screening *************
######################################################################
     
     
## Output files
# 1. Summary statistics
n1 = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-Global-Summary-Dist.csv','w')
writeCSV1 = csv.writer(n1)
writeCSV1.writerow(['Method', 'Cut-Off (No. of Analogs)', 'Accuracy', 'Balanced Accuracy', 'Kappa Coeff.','Sensitivity', 'Specificity', \
                    'True Positives','False Positives', 'True Negatives', 'False Negatives', 'Total'])
                    
# 2. Detailed prediction for each chemical                      
n2 = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-Global-Detail-Dist.csv','w')                    
writeCSV2 = csv.writer(n2)
writeCSV2.writerow(['Hindered Phenol (CERAPP ID)', 'True Binding', 'Number of Lit. Sources', \
                    'Method', 'Number of Analogs', 'Analogs', 'Analog Prediction'])

                    
## Dictionary of select MOE properties of each chemical with >= 4 data sources
f = open(path+'HinderedPhenols-ReadAcross/cerapp_moe.txt','r')
readCSV = csv.reader(f, delimiter=',')

all_p_moe = {}
hp_moe = {}
for line in readCSV:
    if line[0] in p_data.keys():
        all_p_moe[line[0]] = [line[1], line[2], line[3], line[4], line[5], line[6], line[7], line[8], line[9]] #0: class, 1: apol; 2: donacc, 3: logp, 
    if line[0] in all_hp:
        hp_moe[line[0]] = [line[1], line[2], line[3], line[4], line[5], line[6], line[7], line[8], line[9]] # 4. tpsa, 5: vol, 6: acc, 7: don, 8: pol
f.close()

# Screen chemicals for each descriptor set and calculate RA majority vote prediction metrics

## Thresholds for screening variables to determine optimum thresholds
#import itertools
#s1 = [50, 100]; s2 = [1,2,5, 100]; s3 = [5, 6,10]; #s4 = [25, 50, 100]
#s_thresholds = list(itertools.product(s1,s1,s3))
#
acc_m = []; acc_p = []; acc_c = []; acc_all = []
#
#for s_threshold in s_thresholds:
thresholds = range(1,11)
for threshold in thresholds:
    true_pos_m = 0; false_pos_m = 0; true_neg_m = 0; false_neg_m = 0
    true_pos_p = 0; false_pos_p = 0; true_neg_p = 0; false_neg_p = 0
    true_pos_c = 0; false_pos_c = 0; true_neg_c = 0; false_neg_c = 0
    
    true_pos_mcp = 0; false_pos_mcp = 0; true_neg_mcp = 0; false_neg_mcp = 0  
             
    for hp in all_hp:  
        
        ## MOSS neighbors screening
        m_n = []; 
        try:
            truth = hp_data[hp][1]
            m = []; p = []; c = []
            
            m = [x[0] for x in neighbors_m[hp]]
            i_m = 1
            pred_m = []; prediction_m = -1
            for n in m:
                tpsa_Diff = 100*abs(float(hp_moe[hp][4]) - float(all_p_moe[n][4]))/float(hp_moe[hp][4])
                volDiff = 100*abs(float(hp_moe[hp][5]) - float(all_p_moe[n][5]))/float(hp_moe[hp][5])
                logPDiff = abs(float(hp_moe[hp][3]) - float(all_p_moe[n][3]))
                vsa_donDiff = abs(float(hp_moe[hp][7]) - float(all_p_moe[n][7]))
                vsa_accDiff = abs(float(hp_moe[hp][6]) - float(all_p_moe[n][6]))
                vsa_donaccDiff = abs(float(hp_moe[hp][2]) - float(all_p_moe[n][2]))
                apolDiff = 100*abs(float(hp_moe[hp][1]) - float(all_p_moe[n][1]))/float(hp_moe[hp][1])
                
                if logPDiff <= 1 and volDiff <= 100 \
                and vsa_donaccDiff <= 6:
                    m_n.append(n)
                    pred_m.append(all_p_moe[n][0]) 
                    i_m = i_m + 1
                if i_m > threshold:
                    break
                
            prediction_m = Counter(pred_m).most_common()[0][0]  
            try:
                if Counter(pred_m).most_common()[0][1] == Counter(pred_m).most_common()[1][1]:
                    prediction_m == 1
            except:
                pass
            if truth == '1' and prediction_m == '1':
                true_pos_m = true_pos_m + 1
            if truth == '1' and prediction_m == '0':
                false_neg_m = false_neg_m + 1
            if truth == '0' and prediction_m == '0':
                true_neg_m = true_neg_m + 1
            if truth == '0' and prediction_m == '1':
                false_pos_m = false_pos_m + 1
            writeCSV2.writerow([hp, truth, '4', \
                'MoSS', threshold, m_n, prediction_m])
        except:
            pass
        
        ## Chemotyper neighbors screening
        c_n = []; 
        try:
            truth = hp_data[hp][1]
            c = [x[0] for x in neighbors_c[hp]]
            i_c = 1
            pred_c = []; prediction_c = -1
            for n in c:
                tpsa_Diff = 100*abs(float(hp_moe[hp][4]) - float(all_p_moe[n][4]))/float(hp_moe[hp][4])
                volDiff = 100*abs(float(hp_moe[hp][5]) - float(all_p_moe[n][5]))/float(hp_moe[hp][5])
                logPDiff = abs(float(hp_moe[hp][3]) - float(all_p_moe[n][3]))
                vsa_donDiff = abs(float(hp_moe[hp][7]) - float(all_p_moe[n][7]))
                vsa_accDiff = abs(float(hp_moe[hp][6]) - float(all_p_moe[n][6]))
                vsa_donaccDiff = abs(float(hp_moe[hp][2]) - float(all_p_moe[n][2]))
                apolDiff = 100*abs(float(hp_moe[hp][1]) - float(all_p_moe[n][1]))/float(hp_moe[hp][1])
                
                if logPDiff <= 1 and volDiff <= 100 \
                and vsa_donaccDiff <= 6:
                    c_n.append(n)
                    pred_c.append(all_p_moe[n][0]) 
                    i_c = i_c + 1
                if i_c > threshold:
                    break
                
            prediction_c = Counter(pred_c).most_common()[0][0]  
            try:
                if Counter(pred_c).most_common()[0][1] == Counter(pred_c).most_common()[1][1]:
                    prediction_c == 1
            except:
                pass
            if truth == '1' and prediction_c == '1':
                true_pos_c = true_pos_c + 1
            if truth == '1' and prediction_c == '0':
                false_neg_c = false_neg_c + 1
            if truth == '0' and prediction_c == '0':
                true_neg_c = true_neg_c + 1
            if truth == '0' and prediction_c == '1':
                false_pos_c = false_pos_c + 1            
            writeCSV2.writerow([hp, truth, '4', \
                'Chemotyper', threshold, c_n, prediction_c])
        except:
            pass   
        
        ## PubChem neighbors screening
        p_n = []; 
        try:
            truth = hp_data[hp][1]
            p = [x[0] for x in neighbors_p[hp]]
            i_p = 1
            pred_p = []; prediction_p = -1
            for n in p:
                tpsa_Diff = 100*abs(float(hp_moe[hp][4]) - float(all_p_moe[n][4]))/float(hp_moe[hp][4])
                volDiff = 100*abs(float(hp_moe[hp][5]) - float(all_p_moe[n][5]))/float(hp_moe[hp][5])
                logPDiff = abs(float(hp_moe[hp][3]) - float(all_p_moe[n][3]))
                vsa_donDiff = abs(float(hp_moe[hp][7]) - float(all_p_moe[n][7]))
                vsa_accDiff = abs(float(hp_moe[hp][6]) - float(all_p_moe[n][6]))
                vsa_donaccDiff = abs(float(hp_moe[hp][2]) - float(all_p_moe[n][2]))
                apolDiff = 100*abs(float(hp_moe[hp][1]) - float(all_p_moe[n][1]))/float(hp_moe[hp][1])
                
                if logPDiff <= 1 and volDiff <= 100 \
                and vsa_donaccDiff <= 6:
                    p_n.append(n)
                    pred_p.append(all_p_moe[n][0]) 
                    i_p = i_p + 1
                if i_p > threshold:
                    break
                
            prediction_p = Counter(pred_p).most_common()[0][0]  
            try:
                if Counter(pred_p).most_common()[0][1] == Counter(pred_p).most_common()[1][1]:
                    prediction_p == 1
            except:
                pass
            if truth == '1' and prediction_p == '1':
                true_pos_p = true_pos_p + 1
            if truth == '1' and prediction_p == '0':
                false_neg_p = false_neg_p + 1
            if truth == '0' and prediction_p == '0':
                true_neg_p = true_neg_p + 1
            if truth == '0' and prediction_p == '1':
                false_pos_p = false_pos_p + 1   
            writeCSV2.writerow([hp, truth, '4', \
                'PubChem', threshold, p_n, prediction_p])
        except:
            pass  

       # Calculate average prediction from all P/C/M analogs
        neighbors_all = m_n + p_n + c_n
        neighbors_unique = list(set(neighbors_all)) 
        count = len(neighbors_unique)
        try:
            pred = []; prediction = -1
            for neighbor in neighbors_unique:
                pred.append(p_data[neighbor][1])
            prediction = Counter(pred).most_common()[0][0]  
            try:
                if Counter(pred).most_common()[0][1] == Counter(pred).most_common()[1][1]:
                    prediction == 1
            except:
                pass
            if truth == '1' and prediction == '1':
                true_pos_mcp = true_pos_mcp + 1
            if truth == '1' and prediction == '0':
                false_neg_mcp = false_neg_mcp + 1
            if truth == '0' and prediction == '0':
                true_neg_mcp = true_neg_mcp + 1
            if truth == '0' and prediction == '1':
                false_pos_mcp = false_pos_mcp + 1  
            writeCSV2.writerow([hp, truth, '4', \
                'M & C & P', threshold, neighbors_all, prediction])
        except:
            pass         
        
    metrics_m = metrics(true_pos_m , true_neg_m , false_pos_m , false_neg_m)
    metrics_c = metrics(true_pos_c , true_neg_c , false_pos_c , false_neg_c)    
    metrics_p = metrics(true_pos_p , true_neg_p , false_pos_p , false_neg_p)
    metrics_all = metrics(true_pos_mcp , true_neg_mcp , false_pos_mcp, false_neg_mcp)
    
    writeCSV1.writerow(['MoSS', threshold, metrics_m[1] , metrics_m[4] , metrics_m[5], metrics_m[2], metrics_m[3], true_pos_m, false_pos_m , true_neg_m , false_neg_m, metrics_m[0]])
    writeCSV1.writerow(['Chemotyper', threshold,  metrics_c[1] , metrics_c[4] , metrics_c[5], metrics_c[2], metrics_c[3], true_pos_c , false_pos_c , true_neg_c , false_neg_c, metrics_c[0]])
    writeCSV1.writerow(['PubChem', threshold,  metrics_p[1] , metrics_p[4] , metrics_p[5], metrics_p[2], metrics_p[3], true_pos_p , false_pos_p , true_neg_p , false_neg_p, metrics_p[0]])
    writeCSV1.writerow(['MoSS+PubChem+Chemotyper', threshold,  metrics_all[1] , metrics_all[4] , metrics_all[5], metrics_all[2], metrics_all[3],  true_pos_mcp , true_neg_mcp , false_pos_mcp, false_neg_mcp, metrics_all[0]])
                    
n2.close()
n1.close()      
                    
#%%
################################################################## 
## ************R-group level screening and predictions************
################################################################## 

## Read R-group properties
f = open(path+'HinderedPhenols-ReadAcross/R-groupAnalysis/r1.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
R1 = {}; R2 = {}; R3 = {}; R4 = {}; R5 = {}; R6 = {}
        
for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; tpsa = line[8]
    vol = line[9]; wt = line[10]; homo = line[11]; lumo = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R1[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
   
f.close()

f = open(path+'HinderedPhenols-ReadAcross/R-groupAnalysis/r2.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()

for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R2[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]

f.close()

f = open(path+'HinderedPhenols-ReadAcross/R-groupAnalysis/r3.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()

for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R3[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]

f.close()

f = open(path+'HinderedPhenols-ReadAcross/R-groupAnalysis/r4.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()

for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R4[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]

f.close()

f = open(path+'HinderedPhenols-ReadAcross/R-groupAnalysis/r5.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()

for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R5[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]

f.close()

f = open(path+'HinderedPhenols-ReadAcross/R-groupAnalysis/r6.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()

for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R6[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
        
f.close()


## Output files
# 1. Summary statistics
n1 = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-Local-Summary-Dist.csv','w')
writeCSV1 = csv.writer(n1)
writeCSV1.writerow(['Method', 'Cut-Off (No. of Analogs)', 'Accuracy', 'Balanced Accuracy', 'Kappa Coeff.','Sensitivity', 'Specificity', \
                    'True Positives','False Positives', 'True Negatives', 'False Negatives', 'Total'])

# 2. Detailed prediction for each chemical                      
n2 = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-Local-Detail-Dist.csv','w')                    
writeCSV2 = csv.writer(n2)
writeCSV2.writerow(['Hindered Phenol (CERAPP ID)', 'True Binding', 'Number of Lit. Sources', \
                    'Method', 'Number of Analogs', 'Analogs', 'Analog Prediction'])
                    
## Screen chemicals for each descriptor set and calculate RA majority vote prediction metrics

# Thresholds for screening variables
#import itertools
#s1 = [2,3, 100]; s2 = [100, 1000];  s3 = [2,5, 100]; s4 = [2, 100]
#s_thresholds = list(itertools.product(s1,s1,s3,s2, s1,s1,s3,s2,s3,s4))
acc_m = []; acc_p = []; acc_c = []; acc_all = []

#for s in s_thresholds:
thresholds = range(1,11)
for threshold in thresholds:
    true_pos_m = 0; false_pos_m = 0; true_neg_m = 0; false_neg_m = 0
    true_pos_p = 0; false_pos_p = 0; true_neg_p = 0; false_neg_p = 0
    true_pos_c = 0; false_pos_c = 0; true_neg_c = 0; false_neg_c = 0        
    true_pos_mcp = 0; false_pos_mcp = 0; true_neg_mcp = 0; false_neg_mcp = 0  
            
    for hp in all_hp:  
        ## MOSS neighbors screening
        m = []; c = []; p = []
        m_n = []; c_n = []; p_n = []; 
        try:
            truth = hp_data[hp][1]
            m = [x[0] for x in neighbors_m[hp]]
            i_m = 1
            pred_m = []; 
            for n in m:
                try:
                    R2_vol_Diff = 100*abs(float(R2[hp][6]) - float(R2[n][6]))/float(R2[hp][6])
                    R2_hpkb_Diff = abs(float(R2[hp][2]) - float(R2[n][2]))
                    R2_logP_Diff = abs(float(R2[hp][4]) - float(R2[n][4]))
                    R2_donacc_Diff = abs(float(R2[hp][12]) - float(R2[n][12]))
                    R2_don_Diff = abs(float(R2[hp][11]) - float(R2[n][11]))
                    R2_acc_Diff = abs(float(R2[hp][10]) - float(R2[n][10]))
                    R2_tpsa_Diff = 100*abs(float(R2[hp][5]) - float(R2[n][5]))/float(R2[hp][5])
                    R2_kierflex_Diff = abs(float(R2[hp][3]) - float(R2[n][3]))
                except:
                    R2_vol_Diff = 0; R2_hpkb_Diff = 0; R2_logP_Diff = 0; R2_don_Diff = 0 ; R2_acc_Diff = 0 
                    R2_kierflex_Diff = 0; R2_donacc_Diff = 0; R2_tpsa_Diff = 0; R2_logP_Diff = 0
                try:
                    R3_vol_Diff = 100*abs(float(R3[hp][6]) - float(R3[n][6]))/float(R3[hp][6])
                    R3_hpkb_Diff = abs(float(R3[hp][2]) - float(R3[n][2]))
                    R3_logP_Diff = abs(float(R3[hp][4]) - float(R3[n][4]))
                    R3_donacc_Diff = abs(float(R3[hp][12]) - float(R3[n][12]))
                    R3_don_Diff = abs(float(R3[hp][11]) - float(R3[n][11]))
                    R3_acc_Diff = abs(float(R3[hp][10]) - float(R3[n][10]))
                    R3_tpsa_Diff = 100*abs(float(R3[hp][5]) - float(R3[n][5]))/float(R3[hp][5])
                    R3_kierflex_Diff = abs(float(R3[hp][3]) - float(R3[n][3]))
                except:
                    R3_vol_Diff = 0; R3_hpkb_Diff = 0; R3_logP_Diff = 0  ; R3_don_Diff = 0 ; R3_acc_Diff = 0 
                    R3_kierflex_Diff = 0;  R3_donacc_Diff = 0; R3_tpsa_Diff = 0; R3_logP_Diff = 0
                try:
                    R4_vol_Diff = 100*abs(float(R4[hp][6]) - float(R4[n][6]))/float(R4[hp][6])
                    R4_hpkb_Diff = abs(float(R4[hp][2]) - float(R4[n][2]))
                    R4_logP_Diff = abs(float(R4[hp][4]) - float(R4[n][4]))
                    R4_donacc_Diff = abs(float(R4[hp][12]) - float(R4[n][12]))
                    R4_don_Diff = abs(float(R4[hp][11]) - float(R4[n][11]))
                    R4_acc_Diff = abs(float(R4[hp][10]) - float(R4[n][10]))
                    R4_tpsa_Diff = 100*abs(float(R4[hp][5]) - float(R4[n][5]))/float(R4[hp][5])
                    R4_kierflex_Diff = abs(float(R4[hp][3]) - float(R4[n][3]))
                except:
                    R4_vol_Diff = 0; R4_hpkb_Diff = 0; R4_don_Diff = 0 ; R4_acc_Diff = 0 ; R4_kierflex_Diff = 0
                    R4_donacc_Diff = 0; R4_tpsa_Diff = 0; R4_logP_Diff = 0
                try:
                    R6_logP_Diff = abs(float(R6[hp][4]) - float(R6[n][4]))
                except:
                    R6_logP_Diff = 0        
                   
                if R3_hpkb_Diff <= 2 and R3_logP_Diff <= 3 and R3_donacc_Diff <= 2 \
                and R2_hpkb_Diff <= 2 and R2_logP_Diff <= 3 and R2_donacc_Diff <= 2 \
                and R4_donacc_Diff <= 2 \
                and R6_logP_Diff <= 2:
                    m_n.append(n)
                    pred_m.append(p_data[n][1]) 
                    i_m = i_m + 1
                if i_m > threshold:
                    break
                
            prediction_m = Counter(pred_m).most_common()[0][0]  
            try:
                if Counter(pred_m).most_common()[0][1] == Counter(pred_m).most_common()[1][1]:
                    prediction_m == 1
            except:
                pass
            if truth == '1' and prediction_m == '1':
                true_pos_m = true_pos_m + 1
            if truth == '1' and prediction_m == '0':
                false_neg_m = false_neg_m + 1
            if truth == '0' and prediction_m == '0':
                true_neg_m = true_neg_m + 1
            if truth == '0' and prediction_m == '1':
                false_pos_m = false_pos_m + 1
            writeCSV2.writerow([hp, truth, '4', \
                'MoSS', threshold,  m_n, prediction_m])
        except:
            pass
        
        ## Chemotyper neighbors screening
        try:
            truth = hp_data[hp][1]
            c = [x[0] for x in neighbors_c[hp]]
            i_c = 1
            pred_c = []
            for n in c:
                try:
                    R2_vol_Diff = 100*abs(float(R2[hp][6]) - float(R2[n][6]))/float(R2[hp][6])
                    R2_hpkb_Diff = abs(float(R2[hp][2]) - float(R2[n][2]))
                    R2_logP_Diff = abs(float(R2[hp][4]) - float(R2[n][4]))
                    R2_donacc_Diff = abs(float(R2[hp][12]) - float(R2[n][12]))
                    R2_don_Diff = abs(float(R2[hp][11]) - float(R2[n][11]))
                    R2_acc_Diff = abs(float(R2[hp][10]) - float(R2[n][10]))
                    R2_tpsa_Diff = 100*abs(float(R2[hp][5]) - float(R2[n][5]))/float(R2[hp][5])
                    R2_kierflex_Diff = abs(float(R2[hp][3]) - float(R2[n][3]))
                except:
                    R2_vol_Diff = 0; R2_hpkb_Diff = 0; R2_logP_Diff = 0; R2_don_Diff = 0 ; R2_acc_Diff = 0 
                    R2_kierflex_Diff = 0; R2_donacc_Diff = 0; R2_tpsa_Diff = 0; R2_logP_Diff = 0
                try:
                    R3_vol_Diff = 100*abs(float(R3[hp][6]) - float(R3[n][6]))/float(R3[hp][6])
                    R3_hpkb_Diff = abs(float(R3[hp][2]) - float(R3[n][2]))
                    R3_logP_Diff = abs(float(R3[hp][4]) - float(R3[n][4]))
                    R3_donacc_Diff = abs(float(R3[hp][12]) - float(R3[n][12]))
                    R3_don_Diff = abs(float(R3[hp][11]) - float(R3[n][11]))
                    R3_acc_Diff = abs(float(R3[hp][10]) - float(R3[n][10]))
                    R3_tpsa_Diff = 100*abs(float(R3[hp][5]) - float(R3[n][5]))/float(R3[hp][5])
                    R3_kierflex_Diff = abs(float(R3[hp][3]) - float(R3[n][3]))
                except:
                    R3_vol_Diff = 0; R3_hpkb_Diff = 0; R3_logP_Diff = 0  ; R3_don_Diff = 0 ; R3_acc_Diff = 0 
                    R3_kierflex_Diff = 0;  R3_donacc_Diff = 0; R3_tpsa_Diff = 0; R3_logP_Diff = 0
                try:
                    R4_vol_Diff = 100*abs(float(R4[hp][6]) - float(R4[n][6]))/float(R4[hp][6])
                    R4_hpkb_Diff = abs(float(R4[hp][2]) - float(R4[n][2]))
                    R4_logP_Diff = abs(float(R4[hp][4]) - float(R4[n][4]))
                    R4_donacc_Diff = abs(float(R4[hp][12]) - float(R4[n][12]))
                    R4_don_Diff = abs(float(R4[hp][11]) - float(R4[n][11]))
                    R4_acc_Diff = abs(float(R4[hp][10]) - float(R4[n][10]))
                    R4_tpsa_Diff = 100*abs(float(R4[hp][5]) - float(R4[n][5]))/float(R4[hp][5])
                    R4_kierflex_Diff = abs(float(R4[hp][3]) - float(R4[n][3]))
                except:
                    R4_vol_Diff = 0; R4_hpkb_Diff = 0; R4_don_Diff = 0 ; R4_acc_Diff = 0 ; R4_kierflex_Diff = 0
                    R4_donacc_Diff = 0; R4_tpsa_Diff = 0; R4_logP_Diff = 0
                try:
                    R6_logP_Diff = abs(float(R6[hp][4]) - float(R6[n][4]))
                except:
                    R6_logP_Diff = 0        
                   
                if R3_hpkb_Diff <= 2 and R3_logP_Diff <= 3 and R3_donacc_Diff <= 2 \
                and R2_hpkb_Diff <= 2 and R2_logP_Diff <= 3 and R2_donacc_Diff <= 2 \
                and R4_donacc_Diff <= 2 \
                and R6_logP_Diff <= 2:
                    c_n.append(n)
                    pred_c.append(p_data[n][1]) 
                    i_c = i_c + 1
                if i_c > threshold:
                    break
                
            prediction_c = Counter(pred_c).most_common()[0][0]  
            try:
                if Counter(pred_c).most_common()[0][1] == Counter(pred_c).most_common()[1][1]:
                    prediction_c == 1
            except:
                pass
            if truth == '1' and prediction_c == '1':
                true_pos_c = true_pos_c + 1
            if truth == '1' and prediction_c == '0':
                false_neg_c = false_neg_c + 1
            if truth == '0' and prediction_c == '0':
                true_neg_c = true_neg_c + 1
            if truth == '0' and prediction_c == '1':
                false_pos_c = false_pos_c + 1   
            writeCSV2.writerow([hp, truth, '4', \
                'Chemotyper', threshold,c_n, prediction_c])
        except:
            pass    
        
        ## PubChem neighbors screening
        try:
            truth = hp_data[hp][1]
            p = [x[0] for x in neighbors_p[hp]]
            i_p = 1
            pred_p = []
            for n in p:
                try:
                    R2_vol_Diff = 100*abs(float(R2[hp][6]) - float(R2[n][6]))/float(R2[hp][6])
                    R2_hpkb_Diff = abs(float(R2[hp][2]) - float(R2[n][2]))
                    R2_logP_Diff = abs(float(R2[hp][4]) - float(R2[n][4]))
                    R2_donacc_Diff = abs(float(R2[hp][12]) - float(R2[n][12]))
                    R2_don_Diff = abs(float(R2[hp][11]) - float(R2[n][11]))
                    R2_acc_Diff = abs(float(R2[hp][10]) - float(R2[n][10]))
                    R2_tpsa_Diff = 100*abs(float(R2[hp][5]) - float(R2[n][5]))/float(R2[hp][5])
                    R2_kierflex_Diff = abs(float(R2[hp][3]) - float(R2[n][3]))
                except:
                    R2_vol_Diff = 0; R2_hpkb_Diff = 0; R2_logP_Diff = 0; R2_don_Diff = 0 ; R2_acc_Diff = 0 
                    R2_kierflex_Diff = 0; R2_donacc_Diff = 0; R2_tpsa_Diff = 0; R2_logP_Diff = 0
                try:
                    R3_vol_Diff = 100*abs(float(R3[hp][6]) - float(R3[n][6]))/float(R3[hp][6])
                    R3_hpkb_Diff = abs(float(R3[hp][2]) - float(R3[n][2]))
                    R3_logP_Diff = abs(float(R3[hp][4]) - float(R3[n][4]))
                    R3_donacc_Diff = abs(float(R3[hp][12]) - float(R3[n][12]))
                    R3_don_Diff = abs(float(R3[hp][11]) - float(R3[n][11]))
                    R3_acc_Diff = abs(float(R3[hp][10]) - float(R3[n][10]))
                    R3_tpsa_Diff = 100*abs(float(R3[hp][5]) - float(R3[n][5]))/float(R3[hp][5])
                    R3_kierflex_Diff = abs(float(R3[hp][3]) - float(R3[n][3]))
                except:
                    R3_vol_Diff = 0; R3_hpkb_Diff = 0; R3_logP_Diff = 0  ; R3_don_Diff = 0 ; R3_acc_Diff = 0 
                    R3_kierflex_Diff = 0;  R3_donacc_Diff = 0; R3_tpsa_Diff = 0; R3_logP_Diff = 0
                try:
                    R4_vol_Diff = 100*abs(float(R4[hp][6]) - float(R4[n][6]))/float(R4[hp][6])
                    R4_hpkb_Diff = abs(float(R4[hp][2]) - float(R4[n][2]))
                    R4_logP_Diff = abs(float(R4[hp][4]) - float(R4[n][4]))
                    R4_donacc_Diff = abs(float(R4[hp][12]) - float(R4[n][12]))
                    R4_don_Diff = abs(float(R4[hp][11]) - float(R4[n][11]))
                    R4_acc_Diff = abs(float(R4[hp][10]) - float(R4[n][10]))
                    R4_tpsa_Diff = 100*abs(float(R4[hp][5]) - float(R4[n][5]))/float(R4[hp][5])
                    R4_kierflex_Diff = abs(float(R4[hp][3]) - float(R4[n][3]))
                except:
                    R4_vol_Diff = 0; R4_hpkb_Diff = 0; R4_don_Diff = 0 ; R4_acc_Diff = 0 ; R4_kierflex_Diff = 0
                    R4_donacc_Diff = 0; R4_tpsa_Diff = 0; R4_logP_Diff = 0
                try:
                    R6_logP_Diff = abs(float(R6[hp][4]) - float(R6[n][4]))
                except:
                    R6_logP_Diff = 0        
                   
                if R3_hpkb_Diff <= 2 and R3_logP_Diff <= 3 and R3_donacc_Diff <= 2 \
                and R2_hpkb_Diff <= 2 and R2_logP_Diff <= 3 and R2_donacc_Diff <= 2 \
                and R4_donacc_Diff <= 2 \
                and R6_logP_Diff <= 2:
                    p_n.append(n)
                    pred_p.append(p_data[n][1]) 
                    i_p = i_p + 1
                if i_p > threshold:
                    break
                
            prediction_p = Counter(pred_p).most_common()[0][0]  
            try:
                if Counter(pred_p).most_common()[0][1] == Counter(pred_p).most_common()[1][1]:
                    prediction_p == 1
            except:
                pass
            if truth == '1' and prediction_p == '1':
                true_pos_p = true_pos_p + 1
            if truth == '1' and prediction_p == '0':
                false_neg_p = false_neg_p + 1
            if truth == '0' and prediction_p == '0':
                true_neg_p = true_neg_p + 1
            if truth == '0' and prediction_p == '1':
                false_pos_p = false_pos_p + 1   
            writeCSV2.writerow([hp, truth, '4', \
                'PubChem', threshold, p_n, prediction_p])
        except:
            pass       

    metrics_m = metrics(true_pos_m , true_neg_m , false_pos_m , false_neg_m)
    metrics_c = metrics(true_pos_c , true_neg_c , false_pos_c , false_neg_c)    
    metrics_p = metrics(true_pos_p , true_neg_p , false_pos_p , false_neg_p)
#        metrics_all = metrics(true_pos_mcp , true_neg_mcp , false_pos_mcp, false_neg_mcp)
    writeCSV1.writerow(['MoSS', threshold, metrics_m[1] , metrics_m[4], metrics_m[5],  metrics_m[2], metrics_m[3], true_pos_m, false_pos_m , true_neg_m , false_neg_m, metrics_m[0]])
    writeCSV1.writerow(['Chemotyper', threshold,  metrics_c[1] , metrics_c[4] , metrics_c[5], metrics_c[2], metrics_c[3], true_pos_c , false_pos_c , true_neg_c , false_neg_c, metrics_c[0]])
    writeCSV1.writerow(['PubChem', threshold,  metrics_p[1] , metrics_p[4] , metrics_p[5], metrics_p[2], metrics_p[3], true_pos_p , false_pos_p , true_neg_p , false_neg_p, metrics_p[0]])
#        writeCSV1.writerow(['MoSS+PubChem+Chemotyper', threshold, s, metrics_all[1] , metrics_all[4] , metrics_all[2], metrics_all[3], true_pos_mcp , true_neg_mcp , false_pos_mcp, false_neg_mcp, metrics_all[0]])
                        
n1.close()  
n2.close()

#%%
#################################################################
## ********************* Calculating Kappa **********************
#################################################################

import csv
from collections import Counter
import numpy

## Output files
# 1. Summary statistics
n1 = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-Kappa-Dist.csv','w')
writeCSV1 = csv.writer(n1)
writeCSV1.writerow(['No. of Analogs', 'Model-Pair', 'Kappa Coeff.', 'Total Chemicals Predicted'])
                    
n_analogs = range(1,11)
for n in n_analogs:
    f = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-LitDataSources-Detail-Dist.csv')
    readCSV = csv.reader(f, delimiter=',')
    readCSV.next()
    lit_d = {}
    for line in readCSV:
        if line[2] == '4' and line[3] == 'PubChem' and int(line[4]) == n:
            phenol_id = line[0]; true = int(line[1]); pred = int(line[6])
            lit_d[phenol_id] = pred
    f.close()

  
    f = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-Global-Detail-Dist.csv')
    readCSV = csv.reader(f, delimiter=',')
    readCSV.next()
    g = {}
    for line in readCSV:
        if line[3] == 'PubChem' and int(line[4]) == n:
            phenol_id = line[0]; true = int(line[1]); pred = int(line[6]); 
            g[phenol_id] = pred
    f.close()
    
    f = open(path+'HinderedPhenols-ReadAcross/Number/RA-HP-Local-Detail-Dist.csv')
    readCSV = csv.reader(f, delimiter=',')
    readCSV.next()
    l = {}
    for line in readCSV:
        if line[3] == 'PubChem' and int(line[4]) == n:
            phenol_id = line[0]; true = int(line[1]); pred = int(line[6])
            l[phenol_id] = pred
    f.close()
   
    ## Calculate variables for Kappa calculation
    true_pos_lg = 0; false_neg_lg = 0; true_neg_lg = 0; false_pos_lg = 0 #Lit-Global
    true_pos_ll = 0; false_neg_ll = 0; true_neg_ll = 0; false_pos_ll = 0 # Lit-Local
    true_pos_gl = 0; false_neg_gl = 0; true_neg_gl = 0; false_pos_gl = 0 # Global-Local
    
    for phenol in lit_d.keys():
        if phenol in g.keys():
            if lit_d[phenol] == 1 and g[phenol] == 1:
                true_pos_lg = true_pos_lg + 1
            if lit_d[phenol] == 1 and g[phenol]  == 0:
                false_neg_lg = false_neg_lg + 1
            if lit_d[phenol] == 0 and g[phenol]  == 0:
                true_neg_lg = true_neg_lg + 1
            if lit_d[phenol] == 0 and g[phenol]  == 1:
                false_pos_lg = false_pos_lg + 1   
            
            if g[phenol] == 1 and l[phenol] == 1:
                true_pos_gl = true_pos_gl + 1
            if g[phenol] == 1 and l[phenol]  == 0:
                false_neg_gl = false_neg_gl + 1
            if g[phenol] == 0 and l[phenol]  == 0:
                true_neg_gl = true_neg_gl + 1
            if g[phenol] == 0 and l[phenol]  == 1:
                false_pos_gl = false_pos_gl + 1          
        
        if phenol in l.keys():
            if lit_d[phenol] == 1 and l[phenol] == 1:
                true_pos_ll = true_pos_ll + 1
            if lit_d[phenol] == 1 and l[phenol]  == 0:
                false_neg_ll = false_neg_ll + 1
            if lit_d[phenol] == 0 and l[phenol]  == 0:
                true_neg_ll = true_neg_ll + 1
            if lit_d[phenol] == 0 and l[phenol]  == 1:
                false_pos_ll = false_pos_ll + 1   
    
    metrics_lg = metrics(true_pos_lg , true_neg_lg , false_pos_lg , false_neg_lg)
    metrics_ll = metrics(true_pos_ll , true_neg_ll , false_pos_ll , false_neg_ll)    
    metrics_gl = metrics(true_pos_gl , true_neg_gl , false_pos_gl , false_neg_gl)

    writeCSV1.writerow([n, 'DataQuality + Global', metrics_lg[5], metrics_lg[0]])
    writeCSV1.writerow([n, 'DataQuality + Local', metrics_ll[5], metrics_ll[0]])
    writeCSV1.writerow([n, 'Global + Local', metrics_gl[5], metrics_gl[0]])


    f = open(path+'ReadAcross-Project/Number/RA-HP-LitDataSources-Summary-Dist.csv')
    readCSV = csv.reader(f, delimiter=',')
    for line in readCSV:
        if line[2] == '4' and line[0] == 'PubChem' and int(line[1]) == n:
            lit_d_kappa = float(line[5]); lit_d_total = float(line[12])
    f.close()
    
    f = open(path+'ReadAcross-Project/Number/RA-HP-Global-Summary-Dist.csv')
    readCSV = csv.reader(f, delimiter=',')
    for line in readCSV:
        if line[0] == 'PubChem' and int(line[1]) == n:
            g_kappa = float(line[4]); g_total = float(line[11])
    f.close()

    f = open(path+'ReadAcross-Project/Number/RA-HP-Local-Summary-Dist.csv')
    readCSV = csv.reader(f, delimiter=',')
    for line in readCSV:
        if line[0] == 'PubChem' and int(line[1]) == n:
            l_kappa = float(line[4]); l_total = float(line[11])
    f.close()
    
    writeCSV1.writerow([n, 'No Filter + DataQuality', lit_d_kappa, lit_d_total])
    writeCSV1.writerow([n, 'No Filter + Global', g_kappa, g_total ])
    writeCSV1.writerow([n, 'No Filter + Local', l_kappa, l_total])
    
n1.close()  

#%%
#################################################################
## ***** Calculating Concordance in Analogs *********************
#################################################################

import csv
from collections import Counter
import numpy

path = 'W:/Rapid Tox/'
#path = '/share/home/ppradeep/Rapid Tox/'


f0 = open(path+'ReadAcross-Project/CEARAPP_exp_data/CERAPP_Phenols_AllCAS_QSARready.csv','r')
readCSV0 = csv.reader(f0, delimiter=',')
header = readCSV0.next()
cerapp_casrn = {}
cerapp_auc = {}
for line in readCSV0:
    if line[10] != '' and int(line[10]) >= 4:
        cerapp_casrn.setdefault(line[0],[]).append(line[1]) ## Create a dictionary of cerapp ids and casrns
        cerapp_auc[line[0]] = ''
f0.close()

f0 = open(path+'ReadAcross-Project/SuperMatrix.csv','r')
readCSV0 = csv.reader(f0, delimiter=',')
header = readCSV0.next()
for line in readCSV0:
    auc = line[17]
    cerapp = [key for key in cerapp_casrn.keys() if line[1] in cerapp_casrn[key]]
    try:
        cerapp_auc[cerapp[0].strip(('[]\''))] = auc ## Create a dictionary of cerapp ids and casrns
    except:
       pass 
f0.close()


f = open(path+'ReadAcross-Project/Number/RA-HP-LitDataSources-Detail-Dist.csv')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
lit_d = {}
for line in readCSV:
    if line[2] == '4' and line[3] == 'PubChem' and int(line[4]) == 1:
        phenol_id = line[0]; analog = line[5].strip('[]\'')
        dist = [x[1] for x in neighbors_p[phenol_id] if x[0] == analog]
        lit_d[phenol_id] = [analog, dist[0]]
f.close()

  
f = open(path+'ReadAcross-Project/Number/RA-HP-Global-Detail-Dist.csv')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
g = {}
for line in readCSV:
    if line[3] == 'PubChem' and int(line[4]) == 1:
        phenol_id = line[0]; analog = line[5].strip('[]\'')
        dist = [x[1] for x in neighbors_p[phenol_id] if x[0] == analog]
        g[phenol_id] = [analog, dist[0]]
f.close()

f = open(path+'ReadAcross-Project/Number/RA-HP-Local-Detail-Dist.csv')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
l = {}
for line in readCSV:
    if line[3] == 'PubChem' and int(line[4]) == 1:
        phenol_id = line[0]; analog = line[5].strip('[]\'')
        dist = [x[1] for x in neighbors_p[phenol_id] if x[0] == analog]        
        l[phenol_id] = [analog, dist[0]]
f.close()


## Output file
n1 = open(path+'ReadAcross-Project/Number/RA-HP-DifferentAnalogs-Dist.csv','wb')
writeCSV1 = csv.writer(n1)
writeCSV1.writerow(['Filtering', 'Target', 'Target CASRN', 'Target Agonist-AUC (when available)', 'Target True Binding', \
                    'Analog-1 ', 'Analog-1 CASRN', 'Analog-1 Agonist-AUC (when available)', 'Analog-1 Distance', 'Analog-1 True Binding', \
                    'Analog-2', 'Analog-2 CASRN', 'Analog-2 Agonist-AUC (when available)','Analog-2 Distance', 'Analog-2 True Binding'])

agree_dg = 0; disagree_dg = 0; agree_dl = 0; disagree_dl = 0; agree_gl = 0; disagree_gl = 0 

for phenol in lit_d.keys():
    if phenol in g.keys():
        if lit_d[phenol][0] == g[phenol][0]:
            agree_dg = agree_dg + 1
        else:
            disagree_dg = disagree_dg + 1
            writeCSV1.writerow(['Global Filtering', phenol, cerapp_casrn[phenol], cerapp_auc[phenol], p_data[phenol][1], \
                                lit_d[phenol][0], cerapp_casrn[lit_d[phenol][0]], cerapp_auc[lit_d[phenol][0]], lit_d[phenol][1], p_data[lit_d[phenol][0]][1], \
                                g[phenol][0], cerapp_casrn[g[phenol][0]], cerapp_auc[g[phenol][0]], g[phenol][1], p_data[g[phenol][0]][1]])
    else:
        pass
        
    if phenol in l.keys():
        if lit_d[phenol][0] == l[phenol][0]:
            agree_dl = agree_dl + 1
        else:
            disagree_dl = disagree_dl + 1
            writeCSV1.writerow(['Local Filtering', phenol, cerapp_casrn[phenol], cerapp_auc[phenol], p_data[phenol][1], \
            lit_d[phenol][0], cerapp_casrn[lit_d[phenol][0]], cerapp_auc[lit_d[phenol][0]], lit_d[phenol][1], p_data[lit_d[phenol][0]][1], \
            l[phenol][0], cerapp_casrn[l[phenol][0]], cerapp_auc[l[phenol][0]], l[phenol][1], p_data[l[phenol][0]][1]])
            
    else:
        pass
    
for phenol in g.keys():
    if phenol in l.keys():
        if g[phenol][0] == l[phenol][0]:
            agree_gl = agree_gl + 1
        else:
            disagree_gl = disagree_gl + 1    
            writeCSV1.writerow(['Different between Global & Local', phenol, cerapp_casrn[phenol], cerapp_auc[phenol], p_data[phenol][1], \
            g[phenol][0], cerapp_casrn[g[phenol][0]], cerapp_auc[g[phenol][0]], g[phenol][1], p_data[g[phenol][0]][1], \
            l[phenol][0], cerapp_casrn[l[phenol][0]], cerapp_auc[l[phenol][0]], l[phenol][1], p_data[l[phenol][0]][1]])    
    else:
        pass

different_dg = float(100*(disagree_dg)/(agree_dg + disagree_dg))
different_dl = float(100*(disagree_dl)/(agree_dl + disagree_dl))
different_gl = float(100*(disagree_gl)/(agree_gl + disagree_gl))

print different_dg, different_dl, different_gl
n1.close()
     
#%%
################################################################## 
## ********* Global and local screening and predictions **********
################################################################## 

## Read R-group properties
f = open(path+'ReadAcross-Project/R-groupAnalysis/r1.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
R1 = {}
for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; tpsa = line[8]
    vol = line[9]; wt = line[10]; homo = line[11]; lumo = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R1[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
f.close()

f = open(path+'ReadAcross-Project/R-groupAnalysis/r2.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
R2 = {}
for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R2[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
f.close()


f = open(path+'ReadAcross-Project/R-groupAnalysis/r3.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
R3 = {}
for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R3[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
f.close()


f = open(path+'ReadAcross-Project/R-groupAnalysis/r4.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
R4 = {}
for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R4[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
f.close()


f = open(path+'ReadAcross-Project/R-groupAnalysis/r5.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
R5 = {}
for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R5[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
f.close()


f = open(path+'ReadAcross-Project/R-groupAnalysis/r6.txt')
readCSV = csv.reader(f, delimiter=',')
readCSV.next()
R6 = {}
for line in readCSV:
    phenol_id = line[1]; binding = line[3]; h_pka = line[4]; h_pkb = line[5]; kierflex = line[6];  logp = line[7]; homo = line[8]
    lumo = line[9]; tpsa = line[10]; vol = line[11]; wt = line[12]; h_acc = line[13]; h_don = line[14]; h_ad = line[15]
    R6[phenol_id] = [binding, h_pka, h_pkb, kierflex, logp, tpsa, vol, wt, homo, lumo, h_acc, h_don, h_ad]
f.close()

## Dictionary of select MOE properties of each chemical with >= 4 data sources
f = open(path+'ReadAcross-Project/cerapp_moe.txt','r')
readCSV = csv.reader(f, delimiter=',')

all_p_moe = {}
hp_moe = {}
for line in readCSV:
    if line[0] in p_data.keys():
        all_p_moe[line[0]] = [line[1], line[2], line[3], line[4], line[5], line[6], line[7], line[8], line[9]] #0: class, 1: apol; 2: donacc, 3: logp, 
    if line[0] in all_hp:
        hp_moe[line[0]] = [line[1], line[2], line[3], line[4], line[5], line[6], line[7], line[8], line[9]] # 4. tpsa, 5: vol, 6: acc, 7: don, 8: pol
f.close()

## Output files
# 1. Summary statistics
n1 = open(path+'ReadAcross-Project/Number/RA-HP-GlobalLocal-Summary.csv','w')
writeCSV1 = csv.writer(n1)
writeCSV1.writerow(['Method', 'Cut-Off (No. of Analogs)', 'Accuracy', 'Balanced Accuracy', 'Sensitivity', 'Specificity', \
                    'True Positives','False Positives', 'True Negatives', 'False Negatives', 'Total'])

# 2. Detailed prediction for each chemical                      
n2 = open(path+'ReadAcross-Project/Number/RA-HP-GlobalLocal-Detail.csv','w')                    
writeCSV2 = csv.writer(n2)
writeCSV2.writerow(['Hindered Phenol (CERAPP ID)', 'True Binding', 'Number of Lit. Sources', \
                    'Method', 'Number of Analogs', 'Analogs', 'Analog Prediction'])
                    

acc_m = []; acc_p = []; acc_c = []; acc_all = []
thresholds = range(1,11)
for threshold in thresholds:
    true_pos_m = 0; false_pos_m = 0; true_neg_m = 0; false_neg_m = 0
    true_pos_p = 0; false_pos_p = 0; true_neg_p = 0; false_neg_p = 0
    true_pos_c = 0; false_pos_c = 0; true_neg_c = 0; false_neg_c = 0        
    true_pos_mcp = 0; false_pos_mcp = 0; true_neg_mcp = 0; false_neg_mcp = 0  
            
    for hp in all_hp:  
        ## MOSS neighbors screening
        try:
            truth = hp_data[hp][1]
            m = [x[0] for x in neighbors_m[hp]]
            i_m = 1
            pred_m = []; m_n = []
            for n in m:
                try:
                    R2_vol_Diff = 100*abs(float(R2[hp][6]) - float(R2[n][6]))/float(R2[hp][6])
                    R2_hpkb_Diff = abs(float(R2[hp][2]) - float(R2[n][2]))
                    R2_logP_Diff = abs(float(R2[hp][4]) - float(R2[n][4]))
                    R2_donacc_Diff = abs(float(R2[hp][12]) - float(R2[n][12]))
                    R2_don_Diff = abs(float(R2[hp][11]) - float(R2[n][11]))
                    R2_acc_Diff = abs(float(R2[hp][10]) - float(R2[n][10]))
                    R2_tpsa_Diff = 100*abs(float(R2[hp][5]) - float(R2[n][5]))/float(R2[hp][5])
                    R2_kierflex_Diff = abs(float(R2[hp][3]) - float(R2[n][3]))
                except:
                    R2_vol_Diff = 0; R2_hpkb_Diff = 0; R2_logP_Diff = 0; R2_don_Diff = 0 ; R2_acc_Diff = 0 
                    R2_kierflex_Diff = 0; R2_donacc_Diff = 0; R2_tpsa_Diff = 0; R2_logP_Diff
                try:
                    R3_vol_Diff = 100*abs(float(R3[hp][6]) - float(R3[n][6]))/float(R3[hp][6])
                    R3_hpkb_Diff = abs(float(R3[hp][2]) - float(R3[n][2]))
                    R3_logP_Diff = abs(float(R3[hp][4]) - float(R3[n][4]))
                    R3_donacc_Diff = abs(float(R3[hp][12]) - float(R3[n][12]))
                    R3_don_Diff = abs(float(R3[hp][11]) - float(R3[n][11]))
                    R3_acc_Diff = abs(float(R3[hp][10]) - float(R3[n][10]))
                    R3_tpsa_Diff = 100*abs(float(R3[hp][5]) - float(R3[n][5]))/float(R3[hp][5])
                    R3_kierflex_Diff = abs(float(R3[hp][3]) - float(R3[n][3]))
                except:
                    R3_vol_Diff = 0; R3_hpkb_Diff = 0; R3_logP_Diff = 0  ; R3_don_Diff = 0 ; R3_acc_Diff = 0 
                    R3_kierflex_Diff = 0;  R3_donacc_Diff = 0; R3_tpsa_Diff = 0; R3_logP_Diff
                try:
                    R4_vol_Diff = 100*abs(float(R4[hp][6]) - float(R4[n][6]))/float(R4[hp][6])
                    R4_hpkb_Diff = abs(float(R4[hp][2]) - float(R4[n][2]))
                    R4_logP_Diff = abs(float(R4[hp][4]) - float(R4[n][4]))
                    R4_donacc_Diff = abs(float(R4[hp][12]) - float(R4[n][12]))
                    R4_don_Diff = abs(float(R4[hp][11]) - float(R4[n][11]))
                    R4_acc_Diff = abs(float(R4[hp][10]) - float(R4[n][10]))
                    R4_tpsa_Diff = 100*abs(float(R4[hp][5]) - float(R4[n][5]))/float(R4[hp][5])
                    R4_kierflex_Diff = abs(float(R4[hp][3]) - float(R4[n][3]))
                except:
                    R4_vol_Diff = 0; R4_hpkb_Diff = 0; R4_don_Diff = 0 ; R4_acc_Diff = 0 ; R4_kierflex_Diff = 0
                    R4_donacc_Diff = 0; R4_tpsa_Diff = 0; R4_logP_Diff = 0
                try:
                    R6_logP_Diff = abs(float(R6[hp][4]) - float(R6[n][4]))
                except:
                    R6_logP_Diff = 0   
                
                volDiff = 100*abs(float(hp_moe[hp][5]) - float(all_p_moe[n][5]))/float(hp_moe[hp][5])
                logPDiff = abs(float(hp_moe[hp][3]) - float(all_p_moe[n][3]))
                vsa_donaccDiff = abs(float(hp_moe[hp][2]) - float(all_p_moe[n][2]))

                if R3_hpkb_Diff <= 2 and R3_logP_Diff <= 3  \
                and R2_hpkb_Diff <= 2 and R2_logP_Diff <= 3  \
                and R6_logP_Diff <= 2\
                or (logPDiff <= 1 and volDiff <= 100 and vsa_donaccDiff <= 6):
                    m_n.append(n)
                    pred_m.append(p_data[n][1]) 
                    i_m = i_m + 1
                if i_m > threshold:
                    break
            prediction_m = Counter(pred_m).most_common()[0][0]  
            try:
                if Counter(pred_m).most_common()[0][1] == Counter(pred_m).most_common()[1][1]:
                    prediction_m == 1
            except:
                pass
            if truth == '1' and prediction_m == '1':
                true_pos_m = true_pos_m + 1
            if truth == '1' and prediction_m == '0':
                false_neg_m = false_neg_m + 1
            if truth == '0' and prediction_m == '0':
                true_neg_m = true_neg_m + 1
            if truth == '0' and prediction_m == '1':
                false_pos_m = false_pos_m + 1
            writeCSV2.writerow([hp, truth, '4', \
                'MoSS', threshold,  m_n, prediction_m])
        except:
            pass
        ## Chemotyper neighbors screening
        try:
            truth = hp_data[hp][1]
            c = [x[0] for x in neighbors_c[hp]]
            i_c = 1
            c_n = []; pred_c = []
            for n in c:
                try:
                    R2_vol_Diff = 100*abs(float(R2[hp][6]) - float(R2[n][6]))/float(R2[hp][6])
                    R2_hpkb_Diff = abs(float(R2[hp][2]) - float(R2[n][2]))
                    R2_logP_Diff = abs(float(R2[hp][4]) - float(R2[n][4]))
                    R2_donacc_Diff = abs(float(R2[hp][12]) - float(R2[n][12]))
                    R2_don_Diff = abs(float(R2[hp][11]) - float(R2[n][11]))
                    R2_acc_Diff = abs(float(R2[hp][10]) - float(R2[n][10]))
                    R2_tpsa_Diff = 100*abs(float(R2[hp][5]) - float(R2[n][5]))/float(R2[hp][5])
                    R2_kierflex_Diff = abs(float(R2[hp][3]) - float(R2[n][3]))
                except:
                    R2_vol_Diff = 0; R2_hpkb_Diff = 0; R2_logP_Diff = 0; R2_don_Diff = 0 ; R2_acc_Diff = 0 
                    R2_kierflex_Diff = 0; R2_donacc_Diff = 0; R2_tpsa_Diff = 0; R2_logP_Diff
                try:
                    R3_vol_Diff = 100*abs(float(R3[hp][6]) - float(R3[n][6]))/float(R3[hp][6])
                    R3_hpkb_Diff = abs(float(R3[hp][2]) - float(R3[n][2]))
                    R3_logP_Diff = abs(float(R3[hp][4]) - float(R3[n][4]))
                    R3_donacc_Diff = abs(float(R3[hp][12]) - float(R3[n][12]))
                    R3_don_Diff = abs(float(R3[hp][11]) - float(R3[n][11]))
                    R3_acc_Diff = abs(float(R3[hp][10]) - float(R3[n][10]))
                    R3_tpsa_Diff = 100*abs(float(R3[hp][5]) - float(R3[n][5]))/float(R3[hp][5])
                    R3_kierflex_Diff = abs(float(R3[hp][3]) - float(R3[n][3]))
                except:
                    R3_vol_Diff = 0; R3_hpkb_Diff = 0; R3_logP_Diff = 0  ; R3_don_Diff = 0 ; R3_acc_Diff = 0 
                    R3_kierflex_Diff = 0;  R3_donacc_Diff = 0; R3_tpsa_Diff = 0; R3_logP_Diff
                try:
                    R4_vol_Diff = 100*abs(float(R4[hp][6]) - float(R4[n][6]))/float(R4[hp][6])
                    R4_hpkb_Diff = abs(float(R4[hp][2]) - float(R4[n][2]))
                    R4_logP_Diff = abs(float(R4[hp][4]) - float(R4[n][4]))
                    R4_donacc_Diff = abs(float(R4[hp][12]) - float(R4[n][12]))
                    R4_don_Diff = abs(float(R4[hp][11]) - float(R4[n][11]))
                    R4_acc_Diff = abs(float(R4[hp][10]) - float(R4[n][10]))
                    R4_tpsa_Diff = 100*abs(float(R4[hp][5]) - float(R4[n][5]))/float(R4[hp][5])
                    R4_kierflex_Diff = abs(float(R4[hp][3]) - float(R4[n][3]))
                except:
                    R4_vol_Diff = 0; R4_hpkb_Diff = 0; R4_don_Diff = 0 ; R4_acc_Diff = 0 ; R4_kierflex_Diff = 0
                    R4_donacc_Diff = 0; R4_tpsa_Diff = 0; R4_logP_Diff = 0
                try:
                    R6_logP_Diff = abs(float(R6[hp][4]) - float(R6[n][4]))
                except:
                    R6_logP_Diff = 0        
                    
                volDiff = 100*abs(float(hp_moe[hp][5]) - float(all_p_moe[n][5]))/float(hp_moe[hp][5])
                logPDiff = abs(float(hp_moe[hp][3]) - float(all_p_moe[n][3]))
                vsa_donaccDiff = abs(float(hp_moe[hp][2]) - float(all_p_moe[n][2]))

                if R3_hpkb_Diff <= 2 and R3_logP_Diff <= 3  \
                and R2_hpkb_Diff <= 2 and R2_logP_Diff <= 3  \
                and R6_logP_Diff <= 2\
                or (logPDiff <= 1 and volDiff <= 100 and vsa_donaccDiff <= 6):
                    c_n.append(n)
                    pred_c.append(p_data[n][1]) 
                    i_c = i_c + 1
                if i_c > threshold:
                    break
            prediction_c = Counter(pred_c).most_common()[0][0]  
            try:
                if Counter(pred_c).most_common()[0][1] == Counter(pred_c).most_common()[1][1]:
                    prediction_c == 1
            except:
                pass
            if truth == '1' and prediction_c == '1':
                true_pos_c = true_pos_c + 1
            if truth == '1' and prediction_c == '0':
                false_neg_c = false_neg_c + 1
            if truth == '0' and prediction_c == '0':
                true_neg_c = true_neg_c + 1
            if truth == '0' and prediction_c == '1':
                false_pos_c = false_pos_c + 1   
            writeCSV2.writerow([hp, truth, '4', \
                'Chemotyper', threshold,c_n, prediction_c])
        except:
            pass    
        
        ## PubChem neighbors screening
        try:
            truth = hp_data[hp][1]
            p = [x[0] for x in neighbors_p[hp]]
            i_p = 1
            p_n = []; pred_p = []
            for n in p:
                try:
                    R2_vol_Diff = 100*abs(float(R2[hp][6]) - float(R2[n][6]))/float(R2[hp][6])
                    R2_hpkb_Diff = abs(float(R2[hp][2]) - float(R2[n][2]))
                    R2_logP_Diff = abs(float(R2[hp][4]) - float(R2[n][4]))
                    R2_donacc_Diff = abs(float(R2[hp][12]) - float(R2[n][12]))
                    R2_don_Diff = abs(float(R2[hp][11]) - float(R2[n][11]))
                    R2_acc_Diff = abs(float(R2[hp][10]) - float(R2[n][10]))
                    R2_tpsa_Diff = 100*abs(float(R2[hp][5]) - float(R2[n][5]))/float(R2[hp][5])
                    R2_kierflex_Diff = abs(float(R2[hp][3]) - float(R2[n][3]))
                except:
                    R2_vol_Diff = 0; R2_hpkb_Diff = 0; R2_logP_Diff = 0; R2_don_Diff = 0 ; R2_acc_Diff = 0 
                    R2_kierflex_Diff = 0; R2_donacc_Diff = 0; R2_tpsa_Diff = 0; R2_logP_Diff
                try:
                    R3_vol_Diff = 100*abs(float(R3[hp][6]) - float(R3[n][6]))/float(R3[hp][6])
                    R3_hpkb_Diff = abs(float(R3[hp][2]) - float(R3[n][2]))
                    R3_logP_Diff = abs(float(R3[hp][4]) - float(R3[n][4]))
                    R3_donacc_Diff = abs(float(R3[hp][12]) - float(R3[n][12]))
                    R3_don_Diff = abs(float(R3[hp][11]) - float(R3[n][11]))
                    R3_acc_Diff = abs(float(R3[hp][10]) - float(R3[n][10]))
                    R3_tpsa_Diff = 100*abs(float(R3[hp][5]) - float(R3[n][5]))/float(R3[hp][5])
                    R3_kierflex_Diff = abs(float(R3[hp][3]) - float(R3[n][3]))
                except:
                    R3_vol_Diff = 0; R3_hpkb_Diff = 0; R3_logP_Diff = 0  ; R3_don_Diff = 0 ; R3_acc_Diff = 0 
                    R3_kierflex_Diff = 0;  R3_donacc_Diff = 0; R3_tpsa_Diff = 0; R3_logP_Diff
                try:
                    R4_vol_Diff = 100*abs(float(R4[hp][6]) - float(R4[n][6]))/float(R4[hp][6])
                    R4_hpkb_Diff = abs(float(R4[hp][2]) - float(R4[n][2]))
                    R4_logP_Diff = abs(float(R4[hp][4]) - float(R4[n][4]))
                    R4_donacc_Diff = abs(float(R4[hp][12]) - float(R4[n][12]))
                    R4_don_Diff = abs(float(R4[hp][11]) - float(R4[n][11]))
                    R4_acc_Diff = abs(float(R4[hp][10]) - float(R4[n][10]))
                    R4_tpsa_Diff = 100*abs(float(R4[hp][5]) - float(R4[n][5]))/float(R4[hp][5])
                    R4_kierflex_Diff = abs(float(R4[hp][3]) - float(R4[n][3]))
                except:
                    R4_vol_Diff = 0; R4_hpkb_Diff = 0; R4_don_Diff = 0 ; R4_acc_Diff = 0 ; R4_kierflex_Diff = 0
                    R4_donacc_Diff = 0; R4_tpsa_Diff = 0; R4_logP_Diff = 0
                try:
                    R6_logP_Diff = abs(float(R6[hp][4]) - float(R6[n][4]))
                except:
                    R6_logP_Diff = 0    
                    
                volDiff = 100*abs(float(hp_moe[hp][5]) - float(all_p_moe[n][5]))/float(hp_moe[hp][5])
                logPDiff = abs(float(hp_moe[hp][3]) - float(all_p_moe[n][3]))
                vsa_donaccDiff = abs(float(hp_moe[hp][2]) - float(all_p_moe[n][2]))

                if R3_hpkb_Diff <= 2 and R3_logP_Diff <= 3  \
                and R2_hpkb_Diff <= 2 and R2_logP_Diff <= 3  \
                and R6_logP_Diff <= 2\
                or (logPDiff <= 1 and volDiff <= 100 and vsa_donaccDiff <= 6):
                    p_n.append(n)
                    pred_p.append(p_data[n][1]) 
                    i_p = i_p + 1
                if i_p > threshold:
                    break
            prediction_p = Counter(pred_p).most_common()[0][0]  
            try:
                if Counter(pred_p).most_common()[0][1] == Counter(pred_p).most_common()[1][1]:
                    prediction_p == 1
            except:
                pass
            if truth == '1' and prediction_p == '1':
                true_pos_p = true_pos_p + 1
            if truth == '1' and prediction_p == '0':
                false_neg_p = false_neg_p + 1
            if truth == '0' and prediction_p == '0':
                true_neg_p = true_neg_p + 1
            if truth == '0' and prediction_p == '1':
                false_pos_p = false_pos_p + 1   
            writeCSV2.writerow([hp, truth, '4', \
                'PubChem', threshold, p_n, prediction_p])
        except:
            pass       

    metrics_m = metrics(true_pos_m , true_neg_m , false_pos_m , false_neg_m)
    metrics_c = metrics(true_pos_c , true_neg_c , false_pos_c , false_neg_c)    
    metrics_p = metrics(true_pos_p , true_neg_p , false_pos_p , false_neg_p)

    writeCSV1.writerow(['MoSS', threshold, metrics_m[1] , metrics_m[4] , metrics_m[2], metrics_m[3], true_pos_m, false_pos_m , true_neg_m , false_neg_m, metrics_m[0]])
    writeCSV1.writerow(['Chemotyper', threshold,metrics_c[1] , metrics_c[4] , metrics_c[2], metrics_c[3], true_pos_c , false_pos_c , true_neg_c , false_neg_c, metrics_c[0]])
    writeCSV1.writerow(['PubChem', threshold,  metrics_p[1] , metrics_p[4] , metrics_p[2], metrics_p[3], true_pos_p , false_pos_p , true_neg_p , false_neg_p, metrics_p[0]])

                        
n1.close()  
n2.close()

